*******************************************************************
*** COLLAPSE MEDPAR INTO HOSPITAL-YEAR, BY STAY TYPE, LOS, ETC.	***
*** Last edited: 1/20/2023                       			    ***
*******************************************************************

capture log close
log using "${CodePath}/log/medparsummary_$S_DATE.log", text replace

// Switches
local medpar_summarize 				= 1
local medpar_9915 					= 1
local medparchars_fy9915 		    = 1
local medparchars_byLOSbin_fy9915   = 1
local medparchars_bytoperror_fy9914 = 1

*--------------------------------------------------
* USING MEDPAR: Count number of Medicare stays per fiscal year, and number of Medicare stays in each LOS bin
*--------------------------------------------------
if `medpar_summarize' == 1{
	use "${McareDataPath}/harm/100pct/med/1999/med1999.dta", clear
	gen mon_disch  = month(dschrgdt)
	gen fyear = 1999
	replace fyear = 2000 if mon_disch >= 10
	gen LOS = dschrgdt - admsndt
	capture gen LOSbin = LOS
	replace LOSbin = 2150 if LOS >= 21 & LOS <= 50
	replace LOSbin = 51100 if LOS >= 51 & LOS <= 100
	replace LOSbin = 101 if LOS >= 101

	gen claim = 1
	forval losnum = 0/20{
		gen n_claims_`losnum' = cond(LOSbin == `losnum', 1, 0)
	}
	gen n_claims_2150  = cond(LOSbin == 2150, 1, 0)
	gen n_claims_51100 = cond(LOSbin == 51100, 1, 0)
	gen n_claims_101   = cond(LOSbin == 101, 1, 0)

	// Restrict to PPS claims
		*keep if er_amt > 0 & !missing(er_amt)

		* keep only short-stay claims
		display "short stay claims"
		tab sslssnf, missing
		keep if sslssnf=="S"
		
		* drop GHO (HMO) paid claims
		display "GHO paid claims"
		tab ghopdcd, missing
		drop if ghopdcd=="1"
		
		* keep only PPS claims
		display "PPS claims"
		tab pps_ind, missing
		keep if pps_ind=="2"
		
		* remove "IME only" claims
		display "IME only claims"
		count if abs(drgprice-ime_amt)<.01
		drop if abs(drgprice-ime_amt)<.01
		
		* remove units within a hospital
		display "units within a hospital"
		tab spclunit, missing
		keep if spclunit=="" | spclunit==" "

	gcollapse (sum) n_claims = claim n_claims_*, by(prvdrnum fyear)

	forval y = 2000/2015{
		preserve
			use "${McareDataPath}/harm/100pct/med/`y'/med`y'.dta", clear
			// add in sudgap medpar files
			if `y' >= 2012 & `y' <= 2014{
				append using "${SUDDataPath}/harm/100pct/med/`y'/sudmed`y'.dta"	
			}
			gen mon_disch  = month(dschrgdt)
			gen fyear = `y'
			replace fyear = `y' + 1 if mon_disch >= 10
			gen LOS = dschrgdt - admsndt
			capture gen LOSbin = LOS
			replace LOSbin = 2150 if LOS >= 21 & LOS <= 50
			replace LOSbin = 51100 if LOS >= 51 & LOS <= 100
			replace LOSbin = 101 if LOS >= 101

			gen claim = 1
			forval losnum = 0/20{
				gen n_claims_`losnum' = cond(LOSbin == `losnum', 1, 0)
			}
			gen n_claims_2150  = cond(LOSbin == 2150, 1, 0)
			gen n_claims_51100 = cond(LOSbin == 51100, 1, 0)
			gen n_claims_101   = cond(LOSbin == 101, 1, 0)

			// Restrict to PPS claims
				*keep if er_amt > 0 & !missing(er_amt)

				* keep only short-stay claims
				display "short stay claims"
				tab sslssnf, missing
				keep if sslssnf=="S"
				
				* drop GHO (HMO) paid claims
				display "GHO paid claims"
				tab ghopdcd, missing
				drop if ghopdcd=="1"
				
				* keep only PPS claims
				display "PPS claims"
				tab pps_ind, missing
				keep if pps_ind=="2"
				
				* remove "IME only" claims
				display "IME only claims"
				count if abs(drgprice-ime_amt)<.01
				drop if abs(drgprice-ime_amt)<.01
				
				if (`y'>=2011) {
					* limit to FFS inpatient claims
					display "claim types"
					tab clm_type, missing
					keep if clm_type=="60"
				}
				
				* remove units within a hospital
				display "units within a hospital"
				tab spclunit, missing
				keep if spclunit=="" | spclunit==" "

			gcollapse (sum) n_claims = claim n_claims_*, by(prvdrnum fyear)
			tempfile currentfyear
			save `currentfyear'
		restore
		append using `currentfyear'
	}
	save "${DataPath}/claims/n_claims_fyear9815.dta", replace 
} //medpar_summarize


*--------------------------------------------------
* USING MEDPAR: COMBINE ALL 1999-2016 FILES
*--------------------------------------------------
if `medpar_9915' == 1{
	use "${McareDataPath}/harm/100pct/med/1999/med1999.dta", clear
	gen mon_disch  = month(dschrgdt)
	gen fyear = 1999
	replace fyear = 2000 if mon_disch >= 10
	destring drg_cd, gen(drg)


	// Restrict to PPS claims
		*keep if er_amt > 0 & !missing(er_amt)

		* keep only short-stay claims
		display "short stay claims"
		tab sslssnf, missing
		keep if sslssnf=="S"
		
		* drop GHO (HMO) paid claims
		display "GHO paid claims"
		tab ghopdcd, missing
		drop if ghopdcd=="1"
		
		* keep only PPS claims
		display "PPS claims"
		tab pps_ind, missing
		keep if pps_ind=="2"
		
		* remove "IME only" claims
		display "IME only claims"
		count if abs(drgprice-ime_amt)<.01
		drop if abs(drgprice-ime_amt)<.01
		
		* remove units within a hospital
		display "units within a hospital"
		tab spclunit, missing
		keep if spclunit=="" | spclunit==" "

	forval y = 2000/2015{
		preserve
			use "${McareDataPath}/harm/100pct/med/`y'/med`y'.dta", clear
			// add in sudgap medpar files
			if `y' >= 2012 & `y' <= 2014{
				append using "${SUDDataPath}/harm/100pct/med/`y'/sudmed`y'.dta"	
			}
			gen mon_disch  = month(dschrgdt)
			gen fyear = `y'
			replace fyear = `y' + 1 if mon_disch >= 10
			destring drg_cd, gen(drg)


			// Restrict to PPS claims
				*keep if er_amt > 0 & !missing(er_amt)

				* keep only short-stay claims
				display "short stay claims"
				tab sslssnf, missing
				keep if sslssnf=="S"
				
				* drop GHO (HMO) paid claims
				display "GHO paid claims"
				tab ghopdcd, missing
				drop if ghopdcd=="1"
				
				* keep only PPS claims
				display "PPS claims"
				tab pps_ind, missing
				keep if pps_ind=="2"
				
				* remove "IME only" claims
				display "IME only claims"
				count if abs(drgprice-ime_amt)<.01
				drop if abs(drgprice-ime_amt)<.01
				
				if (`y'>=2011) {
					* limit to FFS inpatient claims
					display "claim types"
					tab clm_type, missing
					keep if clm_type=="60"
				}
				
				* remove units within a hospital
				display "units within a hospital"
				tab spclunit, missing
				keep if spclunit=="" | spclunit==" "

			tempfile currentfyear
			save `currentfyear'
		restore
		append using `currentfyear'
	}
	save "${DataPath}/claims/medpar_fyear9915.dta", replace
} //medpar_9915

 

*--------------------------------------------------
* USING MEDPAR: CALCULATE AVERAGE AND TOTAL INPATIENT CLAIM CHARACTERISTICS
*--------------------------------------------------
if `medparchars_fy9915' == 1{
	// overall
	use dschrgdt admsndt totchrg pmt_amt drgprice sex age_cnt prvdrnum fyear using "${DataPath}/claims/medpar_fyear9915.dta", clear
	gen los = dschrgdt - admsndt
	gen female = 1 	   if sex == "2"
	replace female = 0 if sex == "1"

	gcollapse (mean) mean_los = los share_female = female mean_totchrg = totchrg mean_pmt_amt = pmt_amt mean_drgprice = drgprice mean_age = age_cnt ///
			  (sum) tot_totchrg = totchrg tot_pmt_amt = pmt_amt tot_drgprice = drgprice, by(prvdrnum fyear)
	save "${DataPath}/claims/medparchars_fyear9915.dta", replace

} //medparchars_fy9915

*--------------------------------------------------
* USING MEDPAR: CALCULATE AVERAGE AND TOTAL INPATIENT CLAIM CHARACTERISTICS BY LOS
*--------------------------------------------------
if `medparchars_byLOSbin_fy9915' == 1{
	use dschrgdt admsndt totchrg pmt_amt drgprice prvdrnum fyear using "${DataPath}/claims/medpar_fyear9915.dta", clear

	gen LOS = dschrgdt - admsndt
	capture gen LOSbin = LOS
	replace LOSbin = 2150 if LOS >= 21 & LOS <= 50
	replace LOSbin = 51100 if LOS >= 51 & LOS <= 100
	replace LOSbin = 101 if LOS >= 101

	gen claim = 1
	gcollapse (mean) mean_totchrg = totchrg mean_pmt_amt = pmt_amt mean_drgprice = drgprice ///
			  (sum) tot_totchrg = totchrg tot_pmt_amt = pmt_amt tot_drgprice = drgprice n_claims = claim , by(prvdrnum fyear LOSbin)
	rename * *_
	rename prvdrnum_ prvdrnum
	rename fyear_ fyear
	rename LOSbin_ LOSbin
	reshape wide *_, i(prvdrnum fyear) j(LOSbin)
	save "${DataPath}/claims/medparchars_byLOSbin_fyear9915.dta", replace

} //medparchars_byLOSbin_fy9915


*--------------------------------------------------
* USING MEDPAR: TOP 20 IMPROPER PAYMENTS (BASED ON 2010 CERT REPORT)
*--------------------------------------------------
if `medparchars_bytoperror_fy9914' == 1{
	use drg_cd prvdrnum pmt_amt fyear using "${DataPath}/claims/medpar_fyear9915.dta", clear
	destring drg_cd, gen(drg)

	gen drg_toperror1 = drg == 469 | drg == 470
		label var drg_toperror1 "major joint replacement (469-470)"
	gen drg_toperror2 = drg >= 242 & drg <= 244
		label var drg_toperror2 "permanent cardiac pacemaker (242-244)"
	gen drg_toperror3 = drg == 246 | drg == 247
		label var drg_toperror3 "drug-eluting stent (246-247)"
	gen drg_toperror4 = drg == 871 | drg == 872
		label var drg_toperror4 "sepsis (871-872)"
	gen drg_toperror5 = drg == 313
		label var drg_toperror5 "chest pain (313)"
	gen drg_toperror6 = drg >= 377 & drg <= 379
		label var drg_toperror6 "gi hemorrhage (377-379)"
	gen drg_toperror7 = drg >= 329 & drg <= 331
		label var drg_toperror7 "major bowel procedures (329-331)"
	gen drg_toperror8 = drg >= 177 & drg <= 179
		label var drg_toperror8 "respiratory infections (177-179)"
	gen drg_toperror9 = drg == 391 | drg == 392
		label var drg_toperror9 "esophagitis  and other GI disorders (391-392)"
	gen drg_toperror10 = drg == 689 | drg == 690
		label var drg_toperror10 "kidney and UTI (689 - 690)"
	gen drg_toperror11 = drg == 640 | drg == 641
		label var drg_toperror11 "nutritional and metabolic (640-641)"
	gen drg_toperror12 = drg >= 682 & drg <= 684
		label var drg_toperror12 "renal failure (291-293)"
	gen drg_toperror13 = drg == 312
		label var drg_toperror13 "syncope and collapse (312)"
	gen drg_toperror14 = drg >= 291 & drg <= 293
		label var drg_toperror14 "heart failure and shock (291-293)"
	gen drg_toperror15 = drg >= 308 & drg <= 310
		label var drg_toperror15 "cardiac arrhythmia (308-309)"
	gen drg_toperror16 = drg >= 193 & drg <= 195
		label var drg_toperror16 "pneumonia and pleurisy (193-195)"
	gen drg_toperror17 = drg >= 280 & drg <= 282
		label var drg_toperror17 "AMI (280-282)"
	gen drg_toperror18 = drg >= 190 & drg <= 192
		label var drg_toperror18 "COPD (190-192)"
	gen drg_toperror19 = drg >= 480 & drg <= 482
		label var drg_toperror19 "hip and femur except major joint (480-482)"
	gen drg_toperror20 = drg >= 64 & drg <= 66
		label var drg_toperror20 "intracranial hemorrhage or cerebral infarction (064-066)"

	forval n_error = 1/20{
		gen drg_toperror`n_error'_pmt = pmt_amt if drg_toperror`n_error' == 1
		replace drg_toperror`n_error'_pmt = 0 if drg_toperror`n_error' == 0
	}

	egen drg_anytop20 = rowtotal(drg_toperror1-drg_toperror20)
	egen drg_anytop20_pmt = rowtotal(drg_toperror*_pmt)


	gen claim = 1
	gcollapse (sum) n_claims = claim tot_pmt_amt = pmt_amt  drg_top* drg_anytop20 drg_anytop20_pmt, by(prvdrnum fyear)
	label var drg_anytop20 "n any top 20 error drgs"

	gen n_claims_notop20 = n_claims - drg_anytop20
	label var n_claims_notop20 "n non- top 20 error drgs"

	gen pmt_amt_notop20 = tot_pmt_amt - drg_anytop20_pmt
	label var pmt_amt_notop20 "revenue, non- top 20 error drgs"

	save "${DataPath}/claims/medpar_top20error_fyear9914.dta", replace 
} // medparchars_bytoperror_fy9914
 

log close